package edu.isi.karma.imp.avro; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import org.apache.avro.Schema; import org.apache.avro.file.DataFileReader; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecord; import org.apache.avro.io.EncoderFactory; import org.apache.avro.io.JsonEncoder; import org.apache.commons.io.IOUtils; import org.codehaus.jackson.JsonFactory; import org.json.JSONException; import edu.isi.karma.imp.Import; import edu.isi.karma.imp.json.JsonImport; import edu.isi.karma.rep.Worksheet; import edu.isi.karma.rep.Workspace; import edu.isi.karma.webserver.KarmaException; public class AvroImport extends Import { private int maxNumLines; //TODO writing to a file each time is a hack, but avro seems to like it. private File file; private String encoding; private String worksheetName; public AvroImport (InputStream stream, String worksheetName, Workspace workspace, String encoding, int maxNumLines) throws IOException { super(worksheetName, workspace, encoding); this.maxNumLines = maxNumLines; this.encoding = encoding; this.worksheetName = worksheetName; this.file = File.createTempFile("karma-avro"+System.currentTimeMillis(), "avro"); FileOutputStream fw = new FileOutputStream(file); fw.write(IOUtils.toByteArray(stream)); fw.flush(); fw.close(); this.file.deleteOnExit(); } public AvroImport duplicate() { return new AvroImport(this.file, this.worksheetName, this.workspace, this.encoding, this.maxNumLines); } public AvroImport (String string, String worksheetName, Workspace workspace, String encoding, int maxNumLines) throws IOException { super(worksheetName, workspace, encoding); this.maxNumLines = maxNumLines; this.worksheetName = worksheetName; this.encoding = encoding; this.file = File.createTempFile("karma-avro"+System.currentTimeMillis(), "avro"); FileWriter fw = new FileWriter(file); fw.write(string); fw.flush(); fw.close(); this.file.deleteOnExit(); } public AvroImport (File file, String worksheetName, Workspace workspace, String encoding, int maxNumLines) { super(worksheetName, workspace, encoding); this.maxNumLines = maxNumLines; this.worksheetName = worksheetName; this.encoding = encoding; this.file = file; } @Override public Worksheet generateWorksheet() throws JSONException, IOException, KarmaException { DataFileReader<Void> schemareader = new DataFileReader<>(file, new GenericDatumReader<Void>()); Schema schema = schemareader.getSchema(); schemareader.close(); DataFileReader<GenericRecord> reader = new DataFileReader<>(file, new GenericDatumReader<GenericRecord>(schema)); ByteArrayOutputStream baos = new ByteArrayOutputStream(); baos.write('['); baos.write('\n'); GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(reader.getSchema()); while(reader.hasNext()) { GenericRecord record = reader.next(); JsonEncoder encoder = EncoderFactory.get().jsonEncoder(reader.getSchema(), new JsonFactory().createJsonGenerator(baos)).configure(baos); writer.write(record, encoder); encoder.flush(); if(reader.hasNext()) { baos.write(','); } } reader.close(); baos.write('\n'); baos.write(']'); baos.flush(); baos.close(); String json = new String(baos.toByteArray()); JsonImport jsonImport = new JsonImport(json, this.getFactory(), this.getWorksheet(), workspace, maxNumLines); return jsonImport.generateWorksheet(); } }